****************************************************
* Date: 				21 May 2025
* Paper title: 			Betas distribution and ETF tracking error
* Program's purpose: 	Replicate tables
	
	clear
	set more off
	set     seed 13579
	set sortseed 13579
	cd "C:\Paper_Betas\Data_Harvard_Dataverse"

	
**********************************************
********  T A B L E  1
	* Open dataset
		clear
		import delimited using "Dataset_for_ETFs.csv"
	* Produce left part of Table 1
		sum2docx 	tra_error_etf stdev abs_return_sp500 using "C:\Paper_Betas\Table_1a_summary_stats.docx", replace stats(N mean(%9.4f) sd(%9.4f))  title("Summary statistics")
	* Produce right part of Table 2
		corr2docx 				  stdev abs_return_sp500 using "C:\Paper_Betas\Table_1b_correlations.docx" , replace star nodiagonal fmt(%9.3f)
	* End of Table 1		

	
**********************************************
********  T A B L E  2
	* Open dataset
		clear
		import delimited using "Dataset_for_ETFs.csv"
	* Winsorize extreme values of tracking error	
		foreach var of varlist tra_error_etf {
			capture drop UPPER LOWER
			bysort etf_id: egen UPPER=pctile(`var'), p(99) 
			bysort etf_id: egen LOWER=pctile(`var'), p(1) 
			replace `var'=UPPER 	if `var'>UPPER & `var'~=.
			replace `var'=LOWER 	if `var'<LOWER & `var'~=.
			capture drop UPPER LOWER
			}
	* Declare the data in panel-data format
		tsset etf_id time
	* Perform panel data regression with fixed-effects
		xtreg  tra_error_etf stdev 					, fe 
			outreg2  using "C:\Paper_Betas\Table_2", excel se bdec(4) sdec(4) addstat(R2-within,`e(r2_w)') ctitle([OLS-FE])		
	* Perform panel data regression with fixed-effects (with control variable)
		xtreg  tra_error_etf stdev abs_return_sp500 , fe 		
			outreg2  using "C:\Paper_Betas\Table_2", excel se bdec(4) sdec(4) addstat(R2-within,`e(r2_w)') ctitle([OLS-FE]) append
	* Perform median regression 
		bsqreg tra_error_etf stdev 				   , reps(250)
			outreg2  using "C:\Paper_Betas\Table_2", excel se bdec(4) sdec(4) addstat(R2-pseudo,`e(r2_p)') ctitle([MedReg]) append
	* Perform median regression with fixed effects (with control variable)
		bsqreg tra_error_etf stdev abs_return_sp500, reps(250)
			outreg2  using "C:\Paper_Betas\Table_2", excel se bdec(4) sdec(4) addstat(R2-pseudo,`e(r2_p)') ctitle([MedReg]) append
	* End of Table 2	
			
			
**********************************************
********  A P P E N D I X
	* Open dataset
		clear
		import delimited using "Dataset_for_MutualFunds.csv"		
	* Winsorize extreme values of tracking error	
		foreach var of varlist tra_error_mutual {
			capture drop UPPER LOWER
			bysort mutual_id: egen UPPER=pctile(`var'), p(99) 
			bysort mutual_id: egen LOWER=pctile(`var'), p(1) 
			replace `var'=UPPER 	if `var'>UPPER & `var'~=.
			replace `var'=LOWER 	if `var'<LOWER & `var'~=.
			capture drop UPPER LOWER
			}
	* Declare the data in panel-data format
		tsset mutual_id time
	* Perform panel data regression with fixed-effects
		xtreg  tra_error_mutual stdev 					, fe 
			outreg2  using "C:\Paper_Betas\Appendix", excel se bdec(4) sdec(4) addstat(R2-within,`e(r2_w)') ctitle([OLS-FE])		
	* Perform panel data regression with fixed-effects (with control variable)
		xtreg  tra_error_mutual stdev abs_return_sp500 , fe 		
			outreg2  using "C:\Paper_Betas\Appendix", excel se bdec(4) sdec(4) addstat(R2-within,`e(r2_w)') ctitle([OLS-FE]) append
	* Perform median regression 
		bsqreg tra_error_mutual stdev 				   , reps(250)
			outreg2  using "C:\Paper_Betas\Appendix", excel se bdec(4) sdec(4) addstat(R2-pseudo,`e(r2_p)') ctitle([MedReg]) append
	* Perform median regression with fixed effects (with control variable)
		bsqreg tra_error_mutual stdev abs_return_sp500, reps(250)
			outreg2  using "C:\Paper_Betas\Appendix", excel se bdec(4) sdec(4) addstat(R2-pseudo,`e(r2_p)') ctitle([MedReg]) append
	* End of Table A1		